It’s no secret that there are wide disparities in the funding of schools in the United States. Constitutionally, education is a reserved power of the states, so the federal government is limited, policy-wise, in how it can intervene in educational matters. Since the Great Society legislation of the 1960s, the federal role in education has been supplemental funding for schools serving low income students. Culturally, there is a strong tradition of “local control” over schools–schools are administered and funded at the district level. Most school funding comes from state and local sources. How involved state governments are vary greatly from state to state.
First up, load the necessary libraries and I like to define a custom ggplot theme. I use the tigris package to get geographic data on school district boundaries. Using options(tigris_use_cache = TRUE) creates a cache of these data to prevent the need to redownload.
library(tidyverse)
library(tidycensus)
library(educationdata)
library(tigris)
library(sf)
options(tigris_use_cache = TRUE)
theme_set(
theme_minimal() +
theme(
text = element_text(family = "Lato"),
axis.title.x = element_text(size = 12, hjust = 1),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(size = 12, colour = "grey25", face = "bold"),
plot.subtitle = element_text(size = 10, colour = "grey45"),
legend.position = "bottom",
legend.box = "vertical",
legend.margin=margin(),
legend.text = element_text(size = 8),
legend.title = element_text(size = 10, vjust = 0),
legend.key.size = unit(0.5, "cm")
)
)
Data come from the National Center for Education Statistics’ (NCES) Common Core of Data, which contains administrative, demographic, and financial data for all public school districts in the country. I get information on poverty in school districts through the Small Area Income and Poverty Estimates. I get these data using the API (and associated package) developed by the Urban Institute. Geographic information I get from the tirgis package, which accesses the US Census API. Lastly, I get data on meida income in school districts from the American Communities Survey, again accessed through the Census API using the tidycensus package. tidycensus requires and API key and developer account with the Census. These data take some time to download, so go grab some coffee or mix up a negroni for yourself and for me.
district_funding <-
get_education_data(level = "school-districts",
source = "ccd",
topic = "finance",
filters = list(year = 2015)) %>%
na_if(-1) %>%
na_if(-2) %>%
na_if(-3) %>%
mutate(leaid = as.character(as.numeric(leaid)))
d_dir <-
get_education_data(level = "school-districts",
source = "ccd",
topic = "directory",
filters = list(year = 2015)) %>%
na_if(-1) %>%
na_if(-2) %>%
na_if(-3) %>%
mutate(leaid = as.character(as.numeric(leaid)))
d_pov <-
get_education_data(level = "school-districts",
source = "saipe",
filters = list(year = 2015)) %>%
mutate(leaid = as.character(as.numeric(leaid))) %>%
select(leaid, contains("est"))
d_enrollment <-
get_education_data(level = "school-districts",
source = "ccd",
topic = "enrollment",
by = "race",
filters = list(year = 2015, grade = 99)) %>%
mutate(
race = case_when(
race == 1 ~ "white",
race == 2 ~ "black",
race == 3 ~ "hispanic",
race == 4 ~ "asian",
race == 5 ~ "other",
race == 6 ~ "other",
race == 7 ~ "other",
race == 8 ~ "other",
race == 9 ~ "other",
race == 20 ~ "other",
race == 99 ~ "total"
)
) %>%
pivot_wider(id_cols = c(leaid, year),
names_from = race,
values_from = enrollment,
values_fn = list(enrollment = ~(sum(., na.rm = TRUE)))) %>%
mutate(leaid = as.character(as.numeric(leaid)),
nonwhite = total - white) %>%
mutate_at(vars(white:other, nonwhite), list(prop = ~ ./total))
fips <- fips_codes %>% select(state, state_code) %>% distinct() %>% filter(state_code <= 56) %>% pull(state_code)
district_geo <-
map(fips, ~ school_districts(.x, year = 2015, class = "sf"))
district_geo <- reduce(district_geo, rbind)
median_income <- map(fips, ~
get_acs(geography = "school district (unified)",
variables = "B19013_001",
year = 2015,
state = .x)
) %>%
bind_rows() %>%
select(-NAME)
district_geo <- left_join(district_geo, median_income, by = "GEOID")
district_funding <-
district_funding %>%
left_join(d_dir %>% select(leaid, lea_name, city_location, state_location)) %>%
left_join(d_enrollment) %>%
left_join(d_pov) %>%
inner_join(district_geo %>%
select(GEOID, estimate, geometry) %>%
mutate(GEOID = as.character(as.numeric(GEOID))),
by = c("leaid" = "GEOID"))
Where does school funding come from?
district_funding %>%
drop_na(state_location) %>%
select(fips, leaid, state_location,
rev_fed_total,
rev_state_total, rev_local_total,
enrollment_fall_responsible,
estimate) %>%
group_by(state_location) %>%
summarize_at(vars(contains("rev")), sum, na.rm = TRUE) %>%
pivot_longer(cols = c(-state_location)) %>%
group_by(state_location) %>%
mutate(prop_value = value/sum(value, na.rm = TRUE)) %>%
ggplot(aes(x = state_location, y = prop_value, group = name)) +
geom_col(aes(fill = name), alpha = 0.7) +
scale_fill_viridis_d(name = "Revenue Source", labels = c("Federal", "Local", "State")) +
labs(x = "", y = "Share", title = "Revenue Source by State",
caption = "Source: My calculations using data from the Common Core of Data, accessed through the Urban Institute's API.") +
coord_flip()
district_funding %>%
select(fips, leaid, city_location,
rev_total, rev_fed_total,
rev_state_total, rev_local_total,
enrollment_fall_responsible,
estimate, nonwhite_prop) %>%
filter(rev_total != 0,
rev_fed_total != 0,
rev_state_total != 0,
rev_local_total != 0,
enrollment_fall_responsible != 0) %>%
drop_na(estimate) %>%
mutate_at(vars(enrollment_fall_responsible, contains("rev")), log) %>%
mutate(income_ntile = ntile(estimate, 5)) %>%
pivot_longer(cols = c(-fips, -leaid, -city_location, -enrollment_fall_responsible, -income_ntile, -estimate, -nonwhite_prop)) %>%
mutate(name = case_when(
name == "rev_fed_total" ~ "Total federal revenue",
name == "rev_state_total" ~ "Total state revenue",
name == "rev_local_total" ~ "Total local revenue",
name == "rev_total" ~ "Total revenue",
)) %>%
mutate(name = factor(name, levels = c("Total federal revenue",
"Total state revenue",
"Total local revenue",
"Total revenue"))) %>%
ggplot(aes(x = enrollment_fall_responsible, y = value)) +
geom_point(aes(color = factor(income_ntile)), alpha = 0.5) +
geom_smooth(method = "lm", color = "plum4") +
scale_color_viridis_d(name = "Income quantile") +
labs(x = "Enrollment", y = "") +
facet_wrap(~ name)
district_funding %>%
select(fips, leaid, city_location,
rev_total, rev_fed_total,
rev_state_total, rev_local_total,
enrollment_fall_responsible,
estimate, nonwhite_prop) %>%
filter(rev_total != 0,
rev_fed_total != 0,
rev_state_total != 0,
rev_local_total != 0,
enrollment_fall_responsible != 0) %>%
drop_na(estimate) %>%
mutate_at(vars(enrollment_fall_responsible, contains("rev")), log) %>%
mutate(income_ntile = ntile(estimate, 5)) %>%
pivot_longer(cols = c(-fips, -leaid, -city_location, -enrollment_fall_responsible, -income_ntile, -estimate, -nonwhite_prop)) %>%
mutate(name = case_when(
name == "rev_fed_total" ~ "Total federal revenue",
name == "rev_state_total" ~ "Total state revenue",
name == "rev_local_total" ~ "Total local revenue",
name == "rev_total" ~ "Total revenue",
)) %>%
mutate(name = factor(name, levels = c("Total federal revenue",
"Total state revenue",
"Total local revenue",
"Total revenue"))) %>%
ggplot(aes(x = enrollment_fall_responsible, y = value)) +
geom_point(aes(color = factor(income_ntile), size = nonwhite_prop), alpha = 0.5) +
geom_smooth(method = "lm", color = "plum4") +
scale_color_viridis_d(name = "Income quantile") +
scale_size(name = "Share of students of color",
range = c(0.1, 5)) +
labs(x = "Enrollment", y = "",
title = "Revenue source and enrollement",
caption = "Source: My calculations using data from the Common Core of Data, accessed through the Urban Institute's API.") +
facet_wrap(~ name)
district_funding %>%
select(fips, leaid, city_location,
rev_total, rev_fed_total,
rev_state_total, rev_local_total,
enrollment_fall_responsible,
estimate, nonwhite_prop, nonwhite) %>%
filter(rev_total != 0,
rev_fed_total != 0,
rev_state_total != 0,
rev_local_total != 0,
enrollment_fall_responsible != 0) %>%
drop_na(estimate) %>%
mutate_at(vars(enrollment_fall_responsible, contains("rev")), log) %>%
mutate(income_ntile = ntile(estimate, 5)) %>%
pivot_longer(cols = c(-fips, -leaid, -city_location,
-enrollment_fall_responsible, -income_ntile,
-estimate, -nonwhite_prop, -nonwhite)) %>%
mutate(name = case_when(
name == "rev_fed_total" ~ "Total federal revenue",
name == "rev_state_total" ~ "Total state revenue",
name == "rev_local_total" ~ "Total local revenue",
name == "rev_total" ~ "Total revenue",
)) %>%
mutate(name = factor(name, levels = c("Total federal revenue",
"Total state revenue",
"Total local revenue",
"Total revenue"))) %>%
ggplot(aes(x = nonwhite, y = value)) +
geom_point(aes(color = factor(income_ntile), size = enrollment_fall_responsible), alpha = 0.5) +
geom_smooth(method = "lm", color = "plum4") +
scale_color_viridis_d(name = "Income quantile") +
scale_size(name = "Share of students of color",
range = c(0.1, 5)) +
labs(x = "Enrollment", y = "",
title = "Revenue source and enrollement",
caption = "Source: My calculations using data from the Common Core of Data, accessed through the Urban Institute's API.") +
facet_wrap(~ name)
district_funding %>%
select(fips, leaid, city_location,
rev_total, rev_fed_total,
rev_state_total, rev_local_total,
enrollment_fall_responsible,
estimate, nonwhite_prop, nonwhite) %>%
filter(rev_total != 0,
rev_fed_total != 0,
rev_state_total != 0,
rev_local_total != 0,
enrollment_fall_responsible != 0) %>%
drop_na(estimate) %>%
mutate_at(vars(enrollment_fall_responsible, estimate, contains("rev")), log) %>%
pivot_longer(cols = c(-fips, -leaid, -city_location,
-enrollment_fall_responsible,
-estimate, -nonwhite_prop, -nonwhite)) %>%
mutate(name = case_when(
name == "rev_fed_total" ~ "Total federal revenue",
name == "rev_state_total" ~ "Total state revenue",
name == "rev_local_total" ~ "Total local revenue",
name == "rev_total" ~ "Total revenue",
)) %>%
mutate(name = factor(name, levels = c("Total federal revenue",
"Total state revenue",
"Total local revenue",
"Total revenue"))) %>%
ggplot(aes(x = estimate, y = value)) +
geom_point(aes(color = nonwhite_prop), alpha = 0.5) + geom_smooth(method = "lm", color = "plum4") +
scale_color_viridis_c(name = "Share of students of color") +
labs(x = "Log of Median Income", y = "",
title = "Revenue source and district median income",
caption = "Source: My calculations using data from the Common Core of Data, accessed through the Urban Institute's API.") +
facet_wrap(~ name)
district_funding %>%
select(fips, leaid, city_location,
rev_total, rev_fed_total,
rev_state_total, rev_local_total,
enrollment_fall_responsible,
estimate, nonwhite_prop, nonwhite) %>%
filter(rev_total != 0,
rev_fed_total != 0,
rev_state_total != 0,
rev_local_total != 0,
enrollment_fall_responsible != 0) %>%
drop_na(estimate) %>%
mutate_at(vars(contains("rev")),
list(per_student = ~ ./enrollment_fall_responsible)) %>%
select(-ends_with("total")) %>%
mutate_at(vars(enrollment_fall_responsible, estimate, contains("per_student")), log) %>%
pivot_longer(cols = c(-fips, -leaid, -city_location,
-enrollment_fall_responsible,
-estimate, -nonwhite_prop, -nonwhite)) %>%
mutate(name = case_when(
name == "rev_fed_total_per_student" ~ "Total per student federal revenue",
name == "rev_state_total_per_student" ~ "Total per student state revenue",
name == "rev_local_total_per_student" ~ "Total per student local revenue",
name == "rev_total_per_student" ~ "Total per student revenue",
)) %>%
mutate(name = factor(name, levels = c("Total per student federal revenue",
"Total per student state revenue",
"Total per student local revenue",
"Total per student revenue"))) %>%
ggplot(aes(x = nonwhite_prop, y = value)) +
geom_point(aes(color = estimate), alpha = 0.5) +
geom_smooth(method = "lm", color = "plum4") +
scale_color_viridis_c(name = "Logged Median Income") +
labs(x = "Share of students of color", y = "",
title = "Per student revenue by share of students of color",
caption = "Source: My calculations using data from the Common Core of Data, accessed through the Urban Institute's API.") +
facet_wrap(~ name, scales = "free_y")
district_funding %>%
select(fips, leaid, city_location,
rev_total, rev_fed_total,
rev_state_total, rev_local_total,
enrollment_fall_responsible,
estimate, nonwhite_prop, nonwhite, est_population_5_17_poverty_pct) %>%
filter(rev_total != 0,
rev_fed_total != 0,
rev_state_total != 0,
rev_local_total != 0,
enrollment_fall_responsible != 0) %>%
drop_na(estimate) %>%
mutate_at(vars(contains("rev")),
list(per_student = ~ ./enrollment_fall_responsible)) %>%
select(-ends_with("total")) %>%
mutate_at(vars(enrollment_fall_responsible, estimate, contains("per_student")), log) %>%
pivot_longer(cols = c(-fips, -leaid, -city_location,
-enrollment_fall_responsible,
-estimate, -nonwhite_prop, -nonwhite, -est_population_5_17_poverty_pct)) %>%
mutate(name = case_when(
name == "rev_fed_total_per_student" ~ "Total per student federal revenue",
name == "rev_state_total_per_student" ~ "Total per student state revenue",
name == "rev_local_total_per_student" ~ "Total per student local revenue",
name == "rev_total_per_student" ~ "Total per student revenue",
)) %>%
mutate(name = factor(name, levels = c("Total per student federal revenue",
"Total per student state revenue",
"Total per student local revenue",
"Total per student revenue"))) %>%
ggplot(aes(x = nonwhite_prop, y = value)) +
geom_point(aes(color = est_population_5_17_poverty_pct), alpha = 0.5) +
geom_smooth(method = "lm", color = "plum4") +
scale_color_viridis_c(name = "% in poverty") +
labs(x = "Share of students of color", y = "",
title = "Per student revenue by share of students of color",
caption = "Source: My calculations using data from the Common Core of Data, accessed through the Urban Institute's API.") +
facet_wrap(~ name, scales = "free_y")
district_funding %>%
select(fips, leaid, city_location,
rev_total, rev_fed_total,
rev_state_total, rev_local_total,
enrollment_fall_responsible,
estimate, nonwhite_prop, nonwhite, est_population_5_17_poverty_pct) %>%
filter(rev_total != 0,
rev_fed_total != 0,
rev_state_total != 0,
rev_local_total != 0,
enrollment_fall_responsible != 0) %>%
drop_na(estimate) %>%
mutate_at(vars(contains("rev")),
list(per_student = ~ ./enrollment_fall_responsible)) %>%
select(-ends_with("total")) %>%
mutate_at(vars(enrollment_fall_responsible, estimate, contains("per_student")), log) %>%
pivot_longer(cols = c(-fips, -leaid, -city_location,
-enrollment_fall_responsible,
-estimate, -nonwhite_prop, -nonwhite, -est_population_5_17_poverty_pct)) %>%
mutate(name = case_when(
name == "rev_fed_total_per_student" ~ "Total per student federal revenue",
name == "rev_state_total_per_student" ~ "Total per student state revenue",
name == "rev_local_total_per_student" ~ "Total per student local revenue",
name == "rev_total_per_student" ~ "Total per student revenue",
)) %>%
mutate(name = factor(name, levels = c("Total per student federal revenue",
"Total per student state revenue",
"Total per student local revenue",
"Total per student revenue"))) %>%
ggplot(aes(x = est_population_5_17_poverty_pct, y = value)) +
geom_point(aes(color = nonwhite_prop), alpha = 0.5) +
geom_smooth(method = "lm", color = "plum4") +
scale_color_viridis_c(name = "%students of color") +
labs(x = "%5-17 in poverty", y = "",
title = "Per student revenue by % in poverty",
caption = "Source: My calculations using data from the Common Core of Data, accessed through the Urban Institute's API.") +
facet_wrap(~ name, scales = "free_y")